package com.rock.crawl.actor

import akka.actor.Actor
import com.rock.crawl.message.CrawlStart
import akka.actor.Props
import com.rock.crawl.message.CrawlTask
import com.rock.crawl.Crawler
import scala.collection.mutable.ArrayBuffer
import com.rock.crawl.message.CrawlLinks
import java.util.HashSet
import scala.collection.JavaConversions._
import java.net.URL
class CrawlMasterActor(crawler: Crawler) extends Actor {
  private val crawledPages = new HashSet[String]()
  def receive = {
    case CrawlStart => dispatchTasks(crawler.source)
    case CrawlLinks(urls) => dispatchTasks(urls)
  }

  private def dispatchTask(url: String) {
    val child = context.actorOf(Props[CrawlWorkerActor])
    child ! CrawlTask(url, crawler.processor)
  }

  private def dispatchTasks(urls: Seq[String]) {
    println("receive task...")
    for (url <- urls if !hasCrawled(url) && crawler.filter(url)) {
      crawledPages.add(idOfUrl(url))
      dispatchTask(url)
    }
    println("crawledPages size:" + crawledPages.size())
  }

  private def hasCrawled(url: String) = {
    crawledPages.contains(idOfUrl(url))
  }
  private def idOfUrl(url: String) = {
    val uri = new URL(url)
    val protocol = uri.getProtocol()
    val host = uri.getHost()
    val path = uri.getPath()
    val query = uri.getQuery()
    val q = if (query == null || query.trim().isEmpty()) "" else "?" + query
    protocol + "://" + host + path + q
  }
}